/**********************************************************************
DO FILE SUMMARY

This file appends and cleans REDCap files for the K pilot. I've consolidated
all of the analysis into a single do file. Here are the underlying REDCap projects
analyzed below:

1. Cuestionario estructurado para proveedores y autoridades de salud
2. Visita basal servicios de salud Hytrec plus
3. Visita mensual a servicio de salud
4. Combining visita basal and visita mensual to calculate pre-post differences
5. Auditoria DHIS2
6. Training attendance data analysis
7. Cuestionario para participantes

Note that I exported from REDCap the do files and CSV files. I then ran the
do files and saved the data in dta format. Only the dta files are analyzed here. 
The REDCap automatically generated cleaning do files are unchanged and can be
found in the underlying Dropbox folder.
**********************************************************************/

clear
cls
version 18
set more off
capture log close

* Set graph options and scheme
set scheme s1color
graph set window fontface "Arial"

* Set directory 
cd "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/"

* Use the macro $S_DATE to save files with current date
global date : di %tdCCYY.NN.DD date("$S_DATE","DMY")
log using "HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Log files/log_redcap_${date}.log", replace

/*******************************************************************************
1. Cuestionario estructurado para proveedores y autoridades de salud
*******************************************************************************/

use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - REDCap/Cuestionario estructurado para proveedores y autoridades de salud/CuestionarioEstructu_DATA_NOHDRS_2024-08-26_0840.dta", clear

* Correct a data entry error
	replace e9 = 1 if e2 == "Nidia Castillo" // She is from Solola per Luis email 22 Aug 2024

* Characteristics of MOH participants

	rename e5 role
	rename e7a direct_care
	rename e8 years_service
	rename e9 district
	rename e11 prior_heartsd_training
	rename e13 idioma_maya	

* Feasibility of intervention measure (FIM)
	
	rename e18a fim1
	rename e18b fim2
	rename e18c fim3
	rename e18d fim4
	
	* Generate the row-level mean
	egen fim_mean = rowmean(fim1 fim2 fim3 fim4)
	sum fim_mean, d
	
* Acceptability of intervention measure (AIM)
	
	rename e19a aim1
	rename e19b aim2
	rename e19c aim3
	rename e19d aim4
	
	* Generate the row-level mean
	egen aim_mean = rowmean(aim1 aim2 aim3 aim4)
	sum aim_mean, d
	
* System Usability Scale

	/* From Brooke 1996 paper
	
	Scoring SUS
	SUS yields a single number representing a composite measure of the overall usability of the
	system being studied. Note that scores for individual items are not meaningful on their own.
	To calculate the SUS score, first sum the score contributions from each item. Each item's
	score contribution will range from 0 to 4. For items 1,3,5,7,and 9 the score contribution is the
	scale position minus 1. For items 2,4,6,8 and 10, the contribution is 5 minus the scale position.
	Multiply the sum of the scores by 2.5 to obtain the overall value of SU.
	SUS scores have a range of 0 to 100.	*/
	
	
	* Note: There are 2 separate scores, 1 for DHIS in Sololá and 1 for the boletas
	
	* DHIS 2 SUS score ---------------------------------------------------------
	rename e21a sus_dhis1 
	rename e21b sus_dhis2
	rename e21c sus_dhis3 
	rename e21d sus_dhis4 
	rename e21e sus_dhis5 
	rename e21f sus_dhis6 
	rename e21g sus_dhis7 
	rename e21h sus_dhis8 
	rename e21i sus_dhis9 
	rename e21j sus_dhis10 
	
* Calculations per instructions
	foreach n of numlist 1 3 5 7 9 {
		gen sus_dhis_scale`n' = sus_dhis`n' - 1
		replace sus_dhis_scale`n' = . if sus_dhis`n' == 99
		}

	foreach n of numlist 2(2)10 {
		gen sus_dhis_scale`n' = 5 - sus_dhis`n'
		replace sus_dhis_scale`n' = . if sus_dhis`n' == 99
		}
	
	* Replacing missing values with the rowmean in the calculation
	egen sus_dhis_scale_rowmean = rowmean(sus_dhis_scale*)
	
	foreach n of numlist 1(1)10 {
		replace sus_dhis_scale`n' = sus_dhis_scale_rowmean if sus_dhis_scale`n' == .
	}
	
	* Multiplying row total by 2.5 for final score scaled from 0 to 100
	egen sus_dhis_scale_rowtotal = rowtotal(sus_dhis_scale*) if !missing(sus_dhis_scale_rowmean) // only calculate total for particants that filled out info
	replace sus_dhis_scale_rowtotal = sus_dhis_scale_rowtotal * 2.5
	replace sus_dhis_scale_rowtotal = . if district == 2 // dhis only in Solola
			
	sum sus_dhis_scale_rowtotal, d
	
	* Boletas SUS score --------------------------------------------------------
	
	rename e27a sus_boletas1 
	rename e27b sus_boletas2
	rename e27c sus_boletas3 
	gen sus_boletas4  = . // did not ask this question
	rename e27e sus_boletas5 
	rename e27f sus_boletas6 
	rename e27g sus_boletas7 
	rename e27h sus_boletas8 
	rename e27i sus_boletas9 
	rename e27j sus_boletas10 
	
	* Calculations per instructions
	foreach n of numlist 1 3 5 7 9 {
		gen sus_boletas_scale`n' = sus_boletas`n' - 1
		replace sus_boletas_scale`n' = . if sus_boletas`n' == 99
		}

	foreach n of numlist 2(2)10 {
		gen sus_boletas_scale`n' = 5 - sus_boletas`n'
		replace sus_boletas_scale`n' = . if sus_boletas`n' == 99
		}
	
	* Replacing missing values with the rowmean in the calculation
	egen sus_boletas_scale_rowmean = rowmean(sus_boletas_scale*)
	
	foreach n of numlist 1(1)10 {
		replace sus_boletas_scale`n' = sus_boletas_scale_rowmean if sus_boletas_scale`n' == .
	}
	
	* Multiplying row total by 2.5 for final score scaled from 0 to 100
	egen sus_boletas_scale_rowtotal = rowtotal(sus_boletas_scale*) if !missing(sus_boletas_scale_rowmean) // only calculate total for particants that filled out info
	replace sus_boletas_scale_rowtotal = sus_boletas_scale_rowtotal * 2.5
	replace sus_boletas_scale_rowtotal = . if district == 1 // boletas only in Solola
			
	sum sus_boletas_scale_rowtotal, d
	
	* Compare usability scores
	ttest sus_dhis_scale_rowtotal == sus_boletas_scale_rowtotal, unpaired
	
* Sustainability assessment

	* 22a. El programa tuvo apoyo de los líderes del MSPAS
	rename e22a sustainability1
	sum sustainability1, d
		
	* 22b. El programa tiene el personal apropiado para completar las metas de esta.
	rename e22b sustainability2
	sum sustainability2, d
	
	* 22c. El protocol de tratamiento es fácil de usar para los médicos/enfermeros/auxiliares
	rename e22c sustainability3
	sum sustainability3, d
	
	* 22d. El protocolo está integrado con prácticas clínicas establecidas.
	rename e22d sustainability4
	sum sustainability4, d
	
	* 22e. La implementación del programa define claramene el rol y las responsabilidades para todo el personal.
	rename e22e sustainability5
	sum sustainability5, d
	
	* 22f. El personal recibe apoyo, retroalimentación y entrenamiento continuo.
	rename e22f sustainability6
	recode sustainability6 (5=4) (6=5) (7=6)
	sum sustainability6, d
	
/*******************************************************************************
2. Visita basal servicios de salud Hytrec plus
*******************************************************************************/

	/* See p. 11 of MOPS for codebook on clinics

		11 C/S Esquipulas
		12 P/S Las Peñas
		13 P/S Chanmagua
		14 P/S Timushan
		15 P/S Horcones
		21 C/S San Pablo la Laguna
		32 C/S San Marcos la Laguna
		43 C/S Santa Cruz la Laguna
		44 P/S Jaibalito
		45 Centro de Convergencia Chuitzanchaj
		46 C/S Tzununá
	*/

* Baseline health facility data
	
use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - REDCap/Visita basal servicios de salud Hytrec plus/VisitaBasalServicios_DATA_NOHDRS_2024-08-26_0729.dta", clear

drop if id_ss == 45 // delete Chuitzanchaj as it is a Centro de Convergencia
drop if id_ss == 234 // not sure what this is
drop if id_ss == 999999  // not sure what this is

* Health region
rename vb2 distrito
tab distrito

* Type of health facility 
rename vb3 facility_type
tab facility_type

* Functioning computer
rename vb46 computer
tab computer, m

* Functioning mobile phone or table
rename vb47 phone
tab phone

* Functioning internet
rename vb48 internet
tab internet

* Patient records retrieved and consulted each time a diabetes and/or hypertension patient visits the facility
rename vb49 consult_records
tab consult_records

* Collaborative team meeting conducted in last month focusing on hypertension and/or diabetes patients
tab vb105

* Most common type of access to the facility from the municipal center
tab vb4

* Physical infrastructure

	* Physical space to store patient records
	tab vb52
	
	* Designated space for pharmacy
	tab vb53
	
	* Number of clinic rooms
	tab vb54
	
* Staffing at the health facility (at least one full-time staff in each role) 

	* Physician
	tab vb57
	
	* Medical student
	tab vb59
	
	* Professional nurse
	tab vb55
	
	* Auxiliary nurses
	tab vb60
	
	* Laboratory technician
	tab vb62
	
	* Nutritionist
	tab vb64
	
	* Psychologists
	tab vb66
	
* Availability of key medications

	* Enalapril
	tab vb70
	
	* Losartan
	tab vb76
	
	* Hydrochlorothiazide
	tab vb83
	
	* Metformin
	tab vb89
	
	* Glimepiride
	tab vb96
	
	* Insulin
	tab vb102
	
	* Overall availability of core meds

		di (.6+.5+.7+.6+.6)/5
	
* Availability of functioning key equipment and supplies, n (%)

	* Glucometer
	tab vb104a
	
	* Glucometer test strips
	tab vb104b
	
	* Urine test strips
	tab vb104c
	
	* Tests for hemoglobin A1c
	tab vb104d
	
	* Tests for cholesterol
	tab vb104e
	
	* Tests for serum creatinine
	tab vb104f
	
	* Adult weight scale
	tab vb104g
	
	* Measuring tape or stadiometer board
	tab vb104h
	
	* Stethoscope
	tab vb104i
	
	* Blood pressure apparatus (digital)
	tab vb104j
	
	* Blood pressure apparatus (manual sphygmomanometer)
	tab vb104k
	
	* Refrigerator for storage of medicines and supplies
	tab vb104l
	
	* Overall availability of core diagnostics
	
		di (.8+.8+1)/3
		
/*******************************************************************************
3. Visita mensual a servicio de salud
*******************************************************************************/

use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - REDCap/Visita mensual a servicio de salud/VisitaMensualAServic_DATA_NOHDRS_2024-08-26_0735.dta", clear

drop if id_ss == 45 // delete Chuitzanchaj as it is a Centro de Convergencia

* Proportion of districts conducting at least one care coordination meeting (%)


* Availability of key medications ----------------------------------------------

	// 	br vm58 vm58_m2 vm58_m3 vm58_m4 vm58_m5 vm58_m6 // confirm no missing
	egen collab_rowtotal = rowtotal(vm58 vm58_m2 vm58_m3 vm58_m4 vm58_m5 vm58_m6)
	
		* Calculate the availbility
		summarize collab_rowtotal
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6) // assumes no missing

* Enalapril
	// 	br vm14 vm14_m2 vm14_m3 vm14_m4 vm14_m5 vm14_m6 // confirm no missing
	egen enalapril_rowtotal = rowtotal(vm14 vm14_m2 vm14_m3 vm14_m4 vm14_m5 vm14_m6)
	
		* Calculate the availbility
		summarize enalapril_rowtotal
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6) // assumes no missing
		
* Losartan
	//	br vm20 vm20_m2 vm20_m3 vm20_m4 vm20_m5 vm20_m6 // confirm no missing
	egen losartan_rowtotal = rowtotal(vm20 vm20_m2 vm20_m3 vm20_m4 vm20_m5 vm20_m6)
	
		* Calculate the availbility
		summarize losartan_rowtotal
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6) // assumes no missing
	
* Hydrochlorothiazide
	//	br vm26 vm26_m2 vm26_m3 vm26_m4 vm26_m5 vm26_m6 // confirm no missing
	egen hydrochlorothiazide_rowtotal = rowtotal(vm26 vm26_m2 vm26_m3 vm26_m4 vm26_m5 vm26_m6)
	
		* Calculate the availbility
		summarize hydrochlorothiazide_rowtotal
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6) // assumes no missing
	
* Metformin
	//	br vm32 vm32_m2 vm32_m3 vm32_m4 vm32_m5 vm32_m6 // confirm no missing
	egen metformin_rowtotal = rowtotal(vm32 vm32_m2 vm32_m3 vm32_m4 vm32_m5 vm32_m6)
	
		* Calculate the availbility
		summarize metformin_rowtotal
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6) // assumes no missing
	
* Glimepiride
	//	br vm38 vm38_m2 vm38_m3 vm38_m4 vm38_m5 vm38_m6 // confirm no missing
	egen glimepiride_rowtotal = rowtotal(vm38 vm38_m2 vm38_m3 vm38_m4 vm38_m5 vm38_m6)
	
		* Calculate the availbility
		summarize glimepiride_rowtotal
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6) // assumes no missing
	
* Insulin
	// 	br vm44 vm44_m2 vm44_m3 vm44_m4 vm44_m5 vm44_m6 // confirm no missing
	egen insulin_rowtotal = rowtotal(vm44 vm44_m2 vm44_m3 vm44_m4 vm44_m5 vm44_m6)
	
		* Calculate the availbility
		summarize insulin_rowtotal
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6) // assumes no missing
		
* Mean number of antihypertensive medications available per month over the 6 month implementation period
	egen overall_rowtotal_htn_meds = rowtotal(enalapril_rowtotal losartan_rowtotal hydrochlorothiazide_rowtotal)

		* Calculate the availbility
		summarize overall_rowtotal_htn_meds
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6*3) // assumes no missing
		
* Mean number of antihypertensive medications available per month over the 6 month implementation period
	egen overall_rowtotal_dm_meds = rowtotal(metformin_rowtotal glimepiride_rowtotal)

		* Calculate the availbility
		summarize overall_rowtotal_dm_meds
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6*2) // assumes no missing
		

* Mean number of medications available per month over the 6 month implementation period
	egen overall_rowtotal_meds = rowtotal(enalapril_rowtotal losartan_rowtotal hydrochlorothiazide_rowtotal metformin_rowtotal glimepiride_rowtotal)

		* Calculate the availbility
		summarize overall_rowtotal_meds
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6*5) // assumes no missing
		
		* Check missingness
		egen nonmiss_vm14 = rownonmiss(vm14 vm14_m2 vm14_m3 vm14_m4 vm14_m5 vm14_m6) // Enalapril
		egen nonmiss_vm20 = rownonmiss(vm20 vm20_m2 vm20_m3 vm20_m4 vm20_m5 vm20_m6) // Losartan
		egen nonmiss_vm26 = rownonmiss(vm26 vm26_m2 vm26_m3 vm26_m4 vm26_m5 vm26_m6) // HCTZ
		egen nonmiss_vm32 = rownonmiss(vm32 vm32_m2 vm32_m3 vm32_m4 vm32_m5 vm32_m6) // Metformin
		egen nonmiss_vm38 = rownonmiss(vm38 vm38_m2 vm38_m3 vm38_m4 vm38_m5 vm38_m6) // Glimepiride
		
		foreach var of varlist nonmiss_vm14 nonmiss_vm20 nonmiss_vm26 nonmiss_vm32 nonmiss_vm38 {
			sum `var'
		}
		
* Availability of key diagnostics ----------------------------------------------

* Glucometer
	// 	br vm47 vm47_m2 vm47_m3 vm47_m4 vm47_m5 vm47_m6 // confirm no missing
	
	foreach var of varlist vm47 vm47_m2 vm47_m3 vm47_m4 vm47_m5 vm47_m6{
		gen `var'_new = .
		replace `var'_new = 1 if `var' == 1
		replace `var'_new = 0 if `var' !=1 & `var' != .
	}
	
	egen glucometer_rowtotal = rowtotal(vm47_new vm47_m2_new vm47_m3_new vm47_m4_new vm47_m5_new vm47_m6_new)
	
		* Calculate the availbility
		summarize glucometer_rowtotal
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6) // assumes no missing
		
* Tiras
	// 	br vm48 vm48_m2 vm48_m3 vm48_m4 vm48_m5 vm48_m6 // confirm no missing
	
	foreach var of varlist vm48 vm48_m2 vm48_m3 vm48_m4 vm48_m5 vm48_m6 {
		gen `var'_new = .
		replace `var'_new = 1 if `var' == 1
		replace `var'_new = 0 if `var' !=1 & `var' != .
	}
	
	egen tiras_rowtotal = rowtotal(vm48_new vm48_m2_new vm48_m3_new vm48_m4_new vm48_m5_new vm48_m6_new)
	
		* Calculate the availbility
		summarize tiras_rowtotal
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6) // assumes no missing
		
* Digital blood pressure cuffs
	// 	br vm56 vm56_m2 vm56_m3 vm56_m4 vm56_m5 vm56_m6 // confirm no missing
	
	foreach var of varlist vm56 vm56_m2 vm56_m3 vm56_m4 vm56_m5 vm56_m6 {
		gen `var'_new = .
		replace `var'_new = 1 if `var' == 1
		replace `var'_new = 0 if `var' !=1 & `var' != .
	}
	
	egen digitalcuff_rowtotal = rowtotal(vm56_new vm56_m2_new vm56_m3_new vm56_m4_new vm56_m5_new vm56_m6_new)
	
		* Calculate the availbility
		summarize digitalcuff_rowtotal
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6) // assumes no missing
	
* Proportion of availability of core supplies and diagnosticsd
	egen overall_rowtotal_supplies = rowtotal(glucometer_rowtotal tiras_rowtotal digitalcuff_rowtotal)

		* Calculate the availbility
		summarize overall_rowtotal_supplies
		local sum_variable = r(sum)
		di `sum_variable'
		di `sum_variable' / (10*6*3) // assumes no missing
		
		* Check missingness
		egen nonmiss_vm47 = rownonmiss(vm47 vm47_m2 vm47_m3 vm47_m4 vm47_m5 vm47_m6) // Glucometer
		egen nonmiss_vm48 = rownonmiss(vm48 vm48_m2 vm48_m3 vm48_m4 vm48_m5 vm48_m6) // Tiras
		egen nonmiss_vm56 = rownonmiss(vm56 vm56_m2 vm56_m3 vm56_m4 vm56_m5 vm56_m6) // Digital cuff
		
		foreach var of varlist nonmiss_vm47 nonmiss_vm20 nonmiss_vm26 {
			sum `var'
		}
		
/*******************************************************************************
4. Combining visita basal and visita mensual to calculate pre-post differences
*******************************************************************************/

use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - REDCap/Visita mensual a servicio de salud/VisitaMensualAServic_DATA_NOHDRS_2024-08-26_0735.dta", clear

merge 1:m id_ss using "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - REDCap/Visita basal servicios de salud Hytrec plus/VisitaBasalServicios_DATA_NOHDRS_2024-08-26_0729.dta"

drop if id_ss == 45 // delete Chuitzanchaj as it is a Centro de Convergencia
drop if id_ss == 234 // not sure what this is
drop if id_ss == 999999  // not sure what this is

* Collaborative team meetings
rename vb105 collab0
rename vm58 collab1
rename vm58_m2 collab2
rename vm58_m3 collab3
rename vm58_m4 collab4
rename vm58_m5 collab5
rename vm58_m6 collab6

* Enalapril
rename vb70 enalapril0
rename vm14 enalapril1
rename vm14_m2 enalapril2
rename vm14_m3 enalapril3
rename vm14_m4 enalapril4
rename vm14_m5 enalapril5
rename vm14_m6 enalapril6

* Losartan
rename vb76 losartan0
rename vm20 losartan1
rename vm20_m2 losartan2
rename vm20_m3 losartan3
rename vm20_m4 losartan4
rename vm20_m5 losartan5
rename vm20_m6 losartan6

* HCTZ
rename vb83 hctz0
rename vm26 hctz1
rename vm26_m2 hctz2
rename vm26_m3 hctz3
rename vm26_m4 hctz4
rename vm26_m5 hctz5
rename vm26_m6 hctz6

* Metformin
rename vb89 metformin0
rename vm32 metformin1
rename vm32_m2 metformin2
rename vm32_m3 metformin3
rename vm32_m4 metformin4
rename vm32_m5 metformin5
rename vm32_m6 metformin6

* Glimepiride
rename vb96 glimepiride0
rename vm38 glimepiride1
rename vm38_m2 glimepiride2
rename vm38_m3 glimepiride3
rename vm38_m4 glimepiride4
rename vm38_m5 glimepiride5
rename vm38_m6 glimepiride6

* Glucometer
rename vb104a glucometer0
rename vm47 glucometer1
rename vm47_m2 glucometer2
rename vm47_m3 glucometer3
rename vm47_m4 glucometer4
rename vm47_m5 glucometer5
rename vm47_m6 glucometer6

* Tiras
rename vb104b tiras0
rename vm48 tiras1
rename vm48_m2 tiras2
rename vm48_m3 tiras3
rename vm48_m4 tiras4
rename vm48_m5 tiras5
rename vm48_m6 tiras6

* Digital cuff
rename vb104j digitalcuff0
rename vm56 digitalcuff1
rename vm56_m2 digitalcuff2
rename vm56_m3 digitalcuff3
rename vm56_m4 digitalcuff4
rename vm56_m5 digitalcuff5
rename vm56_m6 digitalcuff6

* This  keeps  relevant variables and reshapes into long needed for mixed effects regresesion
keep id_ss enalapril* losartan* hctz* metformin* glimepiride* glucometer* tiras* digitalcuff* collab*
reshape long enalapril losartan hctz metformin glimepiride glucometer tiras digitalcuff collab, i(id_ss) j(month)

* Defining implementation period
gen implementation = 0
replace implementation = 1 if inrange(month,1,6)

* Calculating pre-post difference in overall meds

egen clinic_month_mean_meds = rowmean(enalapril losartan hctz metformin glimepiride)

mixed clinic_month_mean_meds i.implementation || id_ss: // no random or fixed effects for month
margins implementation, base
margins, dydx(implementation)

* Calculating pre-post difference in overall diagnostics

	foreach var of varlist glucometer tiras digitalcuff {
		gen `var'_new = .
		replace `var'_new = 1 if `var' == 1
		replace `var'_new = 0 if `var' !=1 & `var' != .
	}

egen clinic_month_mean_diag = rowmean(glucometer_new tiras_new digitalcuff_new)

mixed clinic_month_mean_diag i.implementation || id_ss: // no random or fixed effects for month
margins implementation, base
margins, dydx(implementation)

* Calculating pre-post difference in collaborative team meetings

mixed collab i.implementation || id_ss:  // no random or fixed effects for month
margins implementation, base
margins, dydx(implementation)

/*******************************************************************************
5. Auditoria DHIS2
*******************************************************************************/

use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - REDCap/Auditoria DHIS2/AuditoriaDHIS2_DATA_NOHDRS_2024-08-26_0737.dta", clear

gen appropriate = .

* Note that appropriate is coded as no if either disease is inappropriate (for those with both diseases)
replace appropriate = 1 if a10 == 1 & inlist(a11,1,2) // hypertension yes
replace appropriate = 1 if a13 == 1 & inlist(a14,1,2) // diabetes yes

replace appropriate = 0 if a10 == 1 & inlist(a11,4) // hypertension no
replace appropriate = 0  if a13 == 1 & inlist(a14,4) // diabetes no

tab appropriate

// Load the necessary libraries
import excel "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - Training attendance/Resultados pre y post test - Solola #1.xlsx", sheet("Respuestas de formulario 1") firstrow clear
// Keep only the 'nombre' column and remove duplicates
keep Nombre
duplicates drop
// Count the number of unique names
local unique_solola1 = _N

// Import the second Solola file
import excel "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - Training attendance/Resultados pre y post test - Solola #2.xlsx", sheet("Respuestas de formulario 1") firstrow clear
keep Nombre
duplicates drop
local unique_solola2 = _N

// Import the first Esquipulas file
import excel "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - Training attendance/Resultados pre y post test - Esquipulas #1.xlsx", sheet("Respuestas de formulario 1") firstrow clear
keep Nombre
duplicates drop
local unique_esquipulas1 = _N

// Import the second Esquipulas file
import excel "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - Training attendance/Resultados pre y post test - Esquipulas #2.xlsx", sheet("Respuestas de formulario 1") firstrow clear
keep Nombre
duplicates drop
local unique_esquipulas2 = _N

// Display the results
display "Unique names in Solola #1: " `unique_solola1'
display "Unique names in Solola #2: " `unique_solola2'
display "Unique names in Esquipulas #1: " `unique_esquipulas1'
display "Unique names in Esquipulas #2: " `unique_esquipulas2'

// Total unique names across all files
local total_unique_names = `unique_solola1' + `unique_solola2' + `unique_esquipulas1' + `unique_esquipulas2'
display "Total unique names across all files: " `total_unique_names'

/*******************************************************************************
6. Training attendance data analysis
*******************************************************************************/

import excel "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - Training attendance/training_attendance_consolidated.xlsx", sheet("Sheet1") firstrow clear

gsort + nombre

duplicates report nombre // n=59 unique

di 20/2 // participation per district

/*******************************************************************************
7. Cuestionario para participantes
*******************************************************************************/

// use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - REDCap/Cuestionario para participantes/CuestionarioParaPart_DATA_NOHDRS_2024-08-26_0733.dta", clear
//
// drop if id_participante == 0
//
// * Distress
// gen distress = (c9 + c10)/2
// sum distress, d

/*******************************************************************************
8. Ingreso datos SIGSA y registro diario - Chiquimula
*******************************************************************************/

/* This code is not elegant but basically we merge the SIGSA and REDCap data
doing a "fuzzy" merge on the names but require exact matches for the visit date and
health facility. */

use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - REDCap/Ingreso datos SIGSA y registro diario - Chiquimula/IngresoDatosSIGSAYRe_DATA_NOHDRS_2024-09-03_1233.dta", clear

keep id_serviciosalud ds2 ds3 ds5 ds6_1 ds6_2 ds7 ds10

	/* See p. 11 of MOPS for codebook on clinics

		11 C/S Esquipulas
		12 P/S Las Peñas
		13 P/S Chanmagua
		14 P/S Timushan
		15 P/S Horcones
		21 C/S San Pablo la Laguna
		32 C/S San Marcos la Laguna
		43 C/S Santa Cruz la Laguna
		44 P/S Jaibalito
		45 Centro de Convergencia Chuitzanchaj
		46 C/S Tzununá
	*/

rename id_serviciosalud health_facility

	* Step 4: Label the four-week bins if needed
	label define health_facility_label ///
		11 "C/S Esquipulas" ///
		12 "P/S Las Peñas" ///
		13 "P/S Chanmagua" ///
		14 "P/S Timushan" ///
		15 "P/S Horcones"

	label values  health_facility health_facility_label
	tab health_facility
	
rename ds3 visit_date
	format %td visit_date
rename ds6_1 sbp
rename ds6_2 dbp
rename ds7 glucose
rename ds10 fasting
	
* Recoding names per accent errors

	clonevar name_redcap_raw = ds5

	cap program drop correct_encoding
	program define correct_encoding
		* List of incorrect and correct characters
		local old "Ã¡ Ã© Ã­ Ã³ Ãº Ã± Ã¼ Ã Ã‰ Ã Ã Ãš Ã` Ãœ"
		local new "a e i o u n u A E I O U N U"

		* Replace each incorrect character with the correct one
		forvalues i = 1/14 {
			local char_old: word `i' of `old'
			local char_new: word `i' of `new'
			replace `1' = subinstr(`1', "`char_old'", "`char_new'", .)
		}
	end

	* Apply the function to each variable that needs correction
	foreach var in ds5 {
		correct_encoding `var'
	}
	
rename ds5 name_patient

* Identify and then remove any dates before November 12, 2023 or after April 26, 2024	
	gsort + visit_date // first date is Nov 8, 2023. 

	* Define the cutoff dates in Stata's numeric date format
	local start_date = date("12nov2023", "DMY")
	local end_date = date("26apr2024", "DMY")

	* Drop observations with dates outside the specified range
	drop if (visit_date < `start_date') | (visit_date > `end_date')

	* Optional: Verify that the dates are correctly filtered
	list visit_date in 1/10

save "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/redcap chiquimula.dta", replace	
	
* export delimited using /Users/dcflood/Desktop/redcap.csv, replace nolabel

* CHAT GPT fuzzy matching code -------------------------------------------------

// 	ssc install reclink
// 	ssc install matchit
// 	ssc install freqindex

	* Load SIGSA dataset
	use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/sigsa chiquimula.dta", clear

	* Prepare the data by standardizing the name field
	gen name_patient_clean_sigsa = lower(trim(name_patient))
	gen idmaster_sigsa=_n

	* Save the cleaned SIGSA data (temporary save, adjust path as needed)
	save "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/temp_sigsa_cleaned_chiquimula.dta", replace

	* Load RedCap dataset
	use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/redcap chiquimula.dta", clear

	* Prepare the RedCap data in the same way
	gen name_patient_clean_redcap = lower(trim(name_patient))
	gen idmaster_redcap=_n
	clonevar visit_date_redcap = visit_date
	clonevar health_facility_redcap = health_facility

	* Save the cleaned RedCap data
	save "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/temp_redcap_cleaned.dta", replace

	* Use matchit to perform fuzzy matching
	use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/temp_sigsa_cleaned_chiquimula.dta", clear
	
	cd "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files"
	
	matchit idmaster_sigsa name_patient_clean_sigsa ///
		using temp_redcap_cleaned.dta, ///
		idusing(idmaster_redcap) ///
		txtusing(name_patient_clean_redcap) ///
		threshold(0.65) ///
		diagnose
	
	* Merging in all the SIGSA data
	merge m:1 idmaster_sigsa using temp_sigsa_cleaned.dta, nogenerate
	
	* Merging in all the RedCAP data
	merge m:1 idmaster_redcap using temp_redcap_cleaned.dta, nogenerate
	
	drop if idmaster_sigsa == .  // drop all missing values for sigsa 
	
	* Drop records where the difference is more than X number of days days
	gen days_diff = abs(visit_date - visit_date_redcap)
	drop if days_diff > 0 & !missing(visit_date_redcap)
	
	drop if health_facility != health_facility_redcap & !missing(health_facility_redcap) // drop if health facilities do not align
	
	duplicates tag idmaster_sigsa, gen(dup_idmaster_sigsa)
	tab dup_idmaster_sigsa // no duplicates but we have deleted some of the initial records in the code above
	
	save sigsa_cleaned_with_redcap_missing_some_sigsa.dta, replace
	
	* Merging back the missing records from SIGSA
	use temp_sigsa_cleaned.dta, clear
	merge 1:1 idmaster_sigsa using sigsa_cleaned_with_redcap_missing_some_sigsa.dta
	
* Actually doing the calculations ----------------------------------------------	
	
	* Mean values
	sum sbp if htn_enroll_any == 1
	sum dbp if htn_enroll_any == 1
	sum glucose if dm_enroll_any == 1
	
	* Proproportions control
	gen bp_control = 0 if htn_enroll_any == 1 & !missing(sbp) & !missing(dbp)
	replace bp_control = 1 if htn_enroll_any == 1 & sbp<140 & dbp<90
	tab bp_control
	
	gen dm_control = 0 if dm_enroll_any == 1 & !missing(glucose) & !missing(glucose)
	replace dm_control = 1 if dm_enroll_any == 1 & glucose <160 // fasting not recorded, so assuming all are random
	tab dm_control
	
	* Visits captured in electronic monitoring tools, %
	gen non_missing_biomarker = 0 if htn_med_any == 1 | dm_med_any == 1
	replace non_missing_biomarker = 1 if !missing(sbp) & !missing(sbp) & htn_med_any == 1
	replace non_missing_biomarker = 1 if !missing(glucose) & dm_med_any == 1
	tab non_missing_biomarker
	
/*******************************************************************************
9. DHIS2 data analysis with SIGSA - Sololá
*******************************************************************************/

// * Importing and saving results
//
// 	import delimited "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - DHIS2/demographics.csv", clear 
//
// 	save "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - DHIS2/demographics.dta", replace
//
// 	import delimited "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - DHIS2/initial_visit.csv", clear 
//
// 	save "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - DHIS2/initial_visit.dta", replace
//
// 	import delimited "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - DHIS2/follow_up_visit.csv", clear 
//
// 	save "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - DHIS2/follow_up_visit.dta", replace
	
* Combining the files through append
use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - DHIS2/demographics.dta", clear

merge 1:1 htaprogramid using "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - DHIS2/initial_visit.dta", gen(initial_visit)

gen visit = "initial_visit"

append using "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - DHIS2/follow_up_visit.dta", gen(follow_up_visit)
	
replace visit = "not initial visit" if visit == ""

tab visit, m

keep incidentdate organisationunitname htaprogramid nombre apellido htapresióndiastólica1 htapresióndiastólica2 htapresiónsistólica1 htapresiónsistólica2 fechadevisita dmmedicióndeglucosa dmtieneayuno visit enrollmentdate reporteinicialdecaso

* Dropping test patients
drop if organisationunitname == "C/S Aldea Tzununá PRUEBAS"
drop if organisationunitname == "C/S San Pablo La Laguna PRUEBAS"
drop if organisationunitname == "CAP San Marcos La Laguna PRUEBAS"
drop if organisationunitname == "CAP Santa Cruz La Laguna PRUEBAS"
drop if organisationunitname == "CC Chuitzanchaj PRUEBAS"
drop if organisationunitname == "P/S Jaibalito PRUEBAS"

* Dropping CC Chuitzanchaj patients
drop if organisationunitname == "CC Chuitzanchaj"

* Making 1 variable that represents visit dates for both initial and follow-up visits
replace fechadevisita = reporteinicialdecaso if fechadevisita == ""

* Making the order friendlier
order  htaprogramid nombre apellido fechadevisita visit organisationunitname htapresiónsistólica1 htapresiónsistólica2 htapresióndiastólica1 htapresióndiastólica2 dmtieneayuno dmmedicióndeglucosa  

gsort + htaprogramid + fechadevisita

gen visit_date = daily(substr(fechadevisita, 1, 10), "YMD")
format visit_date %td

tab organisationunitname

//
//           Organisation unit name |      Freq.     Percent        Cum.
// ---------------------------------+-----------------------------------
//                C/S Aldea Tzununá |          9        1.34        1.34
//          C/S San Pablo La Laguna |        438       65.18       66.52
//         CAP San Marcos La Laguna |         80       11.90       78.42
//         CAP Santa Cruz La Laguna |        139       20.68       99.11
//                    P/S Jaibalito |          6        0.89      100.00
// ---------------------------------+-----------------------------------
//                            Total |        672      100.00


* Prepping different variables to align with SIGSA data for the merge
gen health_facility = .
replace health_facility = 21 if organisationunitname == "C/S San Pablo La Laguna"
replace health_facility = 32 if organisationunitname == "CAP San Marcos La Laguna"
replace health_facility = 43 if organisationunitname == "CAP Santa Cruz La Laguna"
replace health_facility = 44 if organisationunitname == "P/S Jaibalito"
replace health_facility = 45 if organisationunitname == "C/S Aldea Tzununá"
tab health_facility, m

egen sbp = rowmean(htapresiónsistólica1 htapresiónsistólica2)
egen dbp = rowmean(htapresióndiastólica1 htapresióndiastólica2)
	drop htapresiónsistólica1 htapresiónsistólica2 htapresióndiastólica1 htapresióndiastólica2

rename dmmedicióndeglucosa glucose

gen fasting = .
replace fasting = 0 if dmtieneayuno == "No"
replace fasting = 1 if dmtieneayuno == "Si"
tab fasting 
drop dmtieneayuno
	
* Fix patient names
gen name_patient = cond(missing(nombre), "", trim(nombre)) + " " + cond(missing(apellido), "", trim(apellido))
replace name_patient = trim(name_patient)
list name_patient in 1/10


* Identify and then remove any dates before October 5, 2023 or after April 5, 2024	
	gsort + visit_date // there are many dates before and after these dates, since we provided DHIS in LINKS and continued to support it after

	* Define the cutoff dates in Stata's numeric date format
	local start_date = date("5oct2023", "DMY")
	local end_date = date("5apr2024", "DMY")

	* Drop observations with dates outside the specified range
	drop if (visit_date < `start_date') | (visit_date > `end_date')

	* Optional: Verify that the dates are correctly filtered
	list visit_date in 1/10
	
* Keeping only relevant variables 
keep htaprogramid health_facility sbp dbp fasting name_patient visit glucose visit_date

save "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/dhis solola.dta", replace	

* CHAT GPT fuzzy matching code -------------------------------------------------

// 	ssc install reclink
// 	ssc install matchit
// 	ssc install freqindex

	* Load SIGSA dataset
	use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/sigsa solola.dta", clear

	* Prepare the data by standardizing the name field
	gen name_patient_clean_sigsa = lower(trim(name_patient))
	gen idmaster_sigsa=_n

	* Save the cleaned SIGSA data (temporary save, adjust path as needed)
	save "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/temp_sigsa_cleaned_solola.dta", replace

	* Load DHIS2 dataset
	use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/dhis solola.dta", clear

	* Prepare the DHIS data in the same way
	gen name_patient_clean_dhis2 = lower(trim(name_patient))
	gen idmaster_dhis2=_n
	clonevar visit_date_dhis2 = visit_date
	clonevar health_facility_dhis2 = health_facility

	* Save the cleaned DHIS2 data
	save "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/temp_dhis2_cleaned.dta", replace

	* Use matchit to perform fuzzy matching
	use "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files/temp_sigsa_cleaned_solola.dta", clear
	
	cd "/Users/dcflood/Library/CloudStorage/Dropbox-UniversityofMichigan/David Flood/HEARTS piloto 2023/Datos - SIGSA/Reportes SIGSA/Stata do files/K23 pilot analysis/Temp files"
	
	matchit idmaster_sigsa name_patient_clean_sigsa ///
		using temp_dhis2_cleaned.dta, ///
		idusing(idmaster_dhis2) ///
		txtusing(name_patient_clean_dhis2) ///
		threshold(0.90) ///
		diagnose
	
	* Merging in all the SIGSA data
	merge m:1 idmaster_sigsa using temp_sigsa_cleaned_solola.dta, nogenerate
	
	* Merging in all the DHIS2 data
	merge m:1 idmaster_dhis2 using temp_dhis2_cleaned.dta, nogenerate
	
	drop if idmaster_sigsa == .  // drop all missing values for sigsa 
	
	* Drop records where the difference is more than X number of days days
	gen days_diff = abs(visit_date - visit_date_dhis2)
	drop if days_diff > 0 & !missing(visit_date_dhis2)
	
	drop if health_facility != health_facility_dhis2 & !missing(health_facility_dhis2) // drop if health facilities do not align
	
	duplicates tag idmaster_sigsa, gen(dup_idmaster_sigsa)
	tab dup_idmaster_sigsa // there are 8 duplicates
	
	gsort + idmaster_sigsa + visit
	
	drop if visit == "not initial visit" & dup_idmaster_sigsa == 1 // the duplicates are caused mostly by duplicate DHIS2 data
	drop if name_patient_clean_dhis2 == "ana poron quiacain" & dup_idmaster_sigsa == 1 // this looks like an incorrect match
	
	save sigsa_solola_cleaned_dhis_missing_some_sigsa.dta, replace
	
	* Merging back the missing records from SIGSA
	use temp_sigsa_cleaned_solola.dta, clear
	merge 1:1 idmaster_sigsa using sigsa_solola_cleaned_dhis_missing_some_sigsa.dta
	
* Actually doing the calculations ----------------------------------------------	
	
	order name_patient visit_date htn_med_any dm_med_any htn_enroll_any dm_enroll_any health_facility sbp dbp glucose fasting
	
	* Mean values
	sum sbp if htn_enroll_any == 1
	sum dbp if htn_enroll_any == 1
	sum glucose if dm_enroll_any == 1
	
	* Proproportions control
	gen bp_control = 0 if htn_enroll_any == 1 & !missing(sbp) & !missing(dbp)
	replace bp_control = 1 if htn_enroll_any == 1 & sbp<140 & dbp<90
	tab bp_control
	
	gen dm_control = 0 if dm_enroll_any == 1 & !missing(glucose) & !missing(glucose)
	replace dm_control = 1 if dm_enroll_any == 1 & glucose <160 // fasting not recorded, so assuming all are random
	tab dm_control
	
	* Visits captured in electronic monitoring tools, %
	gen non_missing_biomarker = 0 if htn_med_any == 1 | dm_med_any == 1
	replace non_missing_biomarker = 1 if !missing(sbp) & !missing(sbp) & htn_med_any == 1
	replace non_missing_biomarker = 1 if !missing(glucose) & dm_med_any == 1
	tab non_missing_biomarker
	
/*******************************************************************************
10. Final calculations of visits captured in DHIS2/fichas and DM/HTN control
*******************************************************************************/

/* Hypertension control bp_control)
Chiquimula: 15/22
Solola: 27/61
*/
di (15+27)/(22+61)

/* Diabetes control (dm_control)
Chiquimula: 8/12
Solola: 8/27
*/
di (8+8)/(12+27)

/* Visits captured (non_missing_biomarker):
Chiquimula: 24/526
Solola: 80/848
*/
di (24+80)/(848+526)



